library(tidyr)
library(dplyr)
library(stringr)
library(ggplot2)
df = read.csv("~/Downloads/nascar_driver_statistics.csv")
head(df)
str(df)
'data.frame':   1111 obs. of  21 variables:
 $ X                   : int  1 2 3 4 5 6 7 8 9 10 ...
 $ Driver              : chr  "Joey Logano" "Ross Chastain" "Kyle Larson" "Ryan Blaney" ...
 $ Wins                : int  2 2 2 0 2 4 1 1 1 2 ...
 $ AvgStart            : num  10.9 14.6 8 10.5 12.2 11 15.5 13.7 12 13.4 ...
 $ AvgMidRace          : num  13.5 13.2 14.4 11.4 16.5 10.9 16.4 17.6 20.2 14.2 ...
 $ AvgFinish           : num  13.9 14.2 13.6 13.8 16.1 11.9 16.3 18.5 15.7 16.5 ...
 $ AvgPos              : num  11.9 12.4 12.6 12.1 13.8 11.2 16.2 17.7 17.8 13.2 ...
 $ PassDiff            : int  175 94 268 215 383 299 -79 -71 -209 74 ...
 $ GreenFlagPasses     : int  2950 2793 2774 2686 3144 3064 2919 2999 2668 2606 ...
 $ GreenFlagPassed     : int  2775 2699 2506 2471 2761 2765 2998 3070 2877 2532 ...
 $ QualityPasses       : int  1732 1616 1691 1675 1854 1886 1394 1128 1073 1574 ...
 $ PercentQualityPasses: num  58.7 57.9 61 62.4 59 61.6 47.8 37.6 40.2 60.4 ...
 $ NumFastestLaps      : int  214 388 376 259 338 391 199 179 85 282 ...
 $ LapsInTop15         : int  5746 5691 5806 5571 5306 5747 3485 3598 3270 5630 ...
 $ PercentLapsInTop15  : num  75.1 76.7 79.7 76.8 72.4 80 46.8 48.1 47.1 77.8 ...
 $ LapsLed             : int  538 585 360 457 392 763 247 238 78 713 ...
 $ PercentLapsLed      : num  7 7.9 4.9 6.3 5.3 10.6 3.3 3.2 1.1 9.9 ...
 $ TotalLaps           : int  7647 7415 7282 7252 7332 7180 7442 7488 6943 7232 ...
 $ DriverRating        : num  89.5 92.8 91.8 91.5 87.9 99.3 77.3 69.5 72.9 88.8 ...
 $ Points              : int  3071 3059 3057 3056 3049 3045 3045 3041 3034 3033 ...
 $ Year                : int  2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
colnames(df)
 [1] "X"                    "Driver"               "Wins"                 "AvgStart"             "AvgMidRace"          
 [6] "AvgFinish"            "AvgPos"               "PassDiff"             "GreenFlagPasses"      "GreenFlagPassed"     
[11] "QualityPasses"        "PercentQualityPasses" "NumFastestLaps"       "LapsInTop15"          "PercentLapsInTop15"  
[16] "LapsLed"              "PercentLapsLed"       "TotalLaps"            "DriverRating"         "Points"              
[21] "Year"                
ggplot(df,aes(x=AvgPos,y=DriverRating)) + geom_point(aes(color=factor(Year)))

ggplot(df,aes(x=AvgFinish,y=DriverRating)) + geom_point(aes(color=factor(Year)))

ggplot(df,aes(x=QualityPasses)) + geom_histogram(color='black')

ggplotly(ggplot(df %>% group_by(Year) %>% summarise(avgRating = mean(DriverRating)) %>% arrange(Year),aes(x=Year,y=avgRating)) + geom_line(linewidth=1))
count(df,df$Year)
ggplot(df,aes(x=DriverRating)) + geom_histogram(color='black')

df %>% filter(DriverRating == max(df$DriverRating)) %>% select(Driver,DriverRating,Year)
df %>% filter(DriverRating == min(df$DriverRating)) %>% select(Driver,DriverRating,Year)
count(df,df$Driver) %>% arrange(desc(n))
ggplot(df %>% group_by(Driver) %>% summarise(TotWins = sum(Wins)) %>% arrange(desc(TotWins)) %>% head(10), aes(x=Driver,y=TotWins)) + geom_bar(stat = 'identity',aes(fill=Driver))

library(corrplot)
corrplot 0.95 loaded
numeric_df <- df[, sapply(df, is.numeric)]
corr = cor(numeric_df)
corrplot(corr, method = 'color')

library(corrgram)
corrgram(numeric_df, order=TRUE,
         lower.panel=panel.shade, upper.panel=panel.pie,
         diag.panel=panel.minmax, text.panel=panel.txt)

X = select(df,-DriverRating,-Driver)
y = df['DriverRating']
split = caTools::sample.split(y$DriverRating,SplitRatio = 0.7)
train = subset(numeric_df, split == T)
test = subset(numeric_df, split == F)
model = lm(DriverRating ~ .,data=train)
preds = predict(model,test)
results = data.frame(cbind(preds, actual = test$DriverRating))
head(results)
results$residual = results$actual-results$preds
ggplot() + geom_point(data = results,aes(x=actual,y=residual)) + geom_hline(yintercept = 0,color='red')

ggplot(results,aes(x=actual,y=preds)) + geom_point(aes(color=residual)) + geom_line(aes(x=actual,y=actual),color='red')

LS0tCnRpdGxlOiAiTkFTQ0FSIEFuYWx5c2lzIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KbGlicmFyeSh0aWR5cikKbGlicmFyeShkcGx5cikKbGlicmFyeShzdHJpbmdyKQpsaWJyYXJ5KGdncGxvdDIpCmBgYAoKCmBgYHtyfQpkZiA9IHJlYWQuY3N2KCJ+L0Rvd25sb2Fkcy9uYXNjYXJfZHJpdmVyX3N0YXRpc3RpY3MuY3N2IikKYGBgCgpgYGB7cn0KaGVhZChkZikKYGBgCgoKYGBge3J9CnN0cihkZikKYGBgCmBgYHtyfQpjb2xuYW1lcyhkZikKYGBgCgpgYGB7cn0KZ2dwbG90KGRmLGFlcyh4PUF2Z1Bvcyx5PURyaXZlclJhdGluZykpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9ZmFjdG9yKFllYXIpKSkKYGBgCgpgYGB7cn0KZ2dwbG90KGRmLGFlcyh4PUF2Z0ZpbmlzaCx5PURyaXZlclJhdGluZykpICsgZ2VvbV9wb2ludChhZXMoY29sb3I9ZmFjdG9yKFllYXIpKSkKYGBgCgpgYGB7cn0KZ2dwbG90KGRmLGFlcyh4PVF1YWxpdHlQYXNzZXMpKSArIGdlb21faGlzdG9ncmFtKGNvbG9yPSdibGFjaycpCmBgYApgYGB7cn0KZ2dwbG90bHkoZ2dwbG90KGRmICU+JSBncm91cF9ieShZZWFyKSAlPiUgc3VtbWFyaXNlKGF2Z1JhdGluZyA9IG1lYW4oRHJpdmVyUmF0aW5nKSkgJT4lIGFycmFuZ2UoWWVhciksYWVzKHg9WWVhcix5PWF2Z1JhdGluZykpICsgZ2VvbV9saW5lKGxpbmV3aWR0aD0xKSkKYGBgCmBgYHtyfQpjb3VudChkZixkZiRZZWFyKQpgYGAKCmBgYHtyfQpnZ3Bsb3QoZGYsYWVzKHg9RHJpdmVyUmF0aW5nKSkgKyBnZW9tX2hpc3RvZ3JhbShjb2xvcj0nYmxhY2snKQpgYGAKYGBge3J9CmRmICU+JSBmaWx0ZXIoRHJpdmVyUmF0aW5nID09IG1heChkZiREcml2ZXJSYXRpbmcpKSAlPiUgc2VsZWN0KERyaXZlcixEcml2ZXJSYXRpbmcsWWVhcikKYGBgCmBgYHtyfQpkZiAlPiUgZmlsdGVyKERyaXZlclJhdGluZyA9PSBtaW4oZGYkRHJpdmVyUmF0aW5nKSkgJT4lIHNlbGVjdChEcml2ZXIsRHJpdmVyUmF0aW5nLFllYXIpCmBgYApgYGB7cn0KY291bnQoZGYsZGYkRHJpdmVyKSAlPiUgYXJyYW5nZShkZXNjKG4pKQpgYGAKYGBge3J9CmdncGxvdChkZiAlPiUgZ3JvdXBfYnkoRHJpdmVyKSAlPiUgc3VtbWFyaXNlKFRvdFdpbnMgPSBzdW0oV2lucykpICU+JSBhcnJhbmdlKGRlc2MoVG90V2lucykpICU+JSBoZWFkKDEwKSwgYWVzKHg9RHJpdmVyLHk9VG90V2lucykpICsgZ2VvbV9iYXIoc3RhdCA9ICdpZGVudGl0eScsYWVzKGZpbGw9RHJpdmVyKSkKYGBgCgpgYGB7cn0KbGlicmFyeShjb3JycGxvdCkKYGBgCgpgYGB7cn0KbnVtZXJpY19kZiA8LSBkZlssIHNhcHBseShkZiwgaXMubnVtZXJpYyldCmNvcnIgPSBjb3IobnVtZXJpY19kZikKYGBgCgpgYGB7cn0KY29ycnBsb3QoY29yciwgbWV0aG9kID0gJ2NvbG9yJykKYGBgCgpgYGB7cn0KbGlicmFyeShjb3JyZ3JhbSkKYGBgCgpgYGB7cn0KY29ycmdyYW0obnVtZXJpY19kZiwgb3JkZXI9VFJVRSwKICAgICAgICAgbG93ZXIucGFuZWw9cGFuZWwuc2hhZGUsIHVwcGVyLnBhbmVsPXBhbmVsLnBpZSwKICAgICAgICAgZGlhZy5wYW5lbD1wYW5lbC5taW5tYXgsIHRleHQucGFuZWw9cGFuZWwudHh0KQpgYGAKYGBge3J9ClggPSBzZWxlY3QoZGYsLURyaXZlclJhdGluZywtRHJpdmVyKQpgYGAKCmBgYHtyfQp5ID0gZGZbJ0RyaXZlclJhdGluZyddCmBgYAoKYGBge3J9CnNwbGl0ID0gY2FUb29sczo6c2FtcGxlLnNwbGl0KHkkRHJpdmVyUmF0aW5nLFNwbGl0UmF0aW8gPSAwLjcpCmBgYAoKYGBge3J9CnRyYWluID0gc3Vic2V0KG51bWVyaWNfZGYsIHNwbGl0ID09IFQpCnRlc3QgPSBzdWJzZXQobnVtZXJpY19kZiwgc3BsaXQgPT0gRikKYGBgCgpgYGB7cn0KbW9kZWwgPSBsbShEcml2ZXJSYXRpbmcgfiAuLGRhdGE9dHJhaW4pCmBgYAoKYGBge3J9CnByZWRzID0gcHJlZGljdChtb2RlbCx0ZXN0KQpgYGAKCmBgYHtyfQpyZXN1bHRzID0gZGF0YS5mcmFtZShjYmluZChwcmVkcywgYWN0dWFsID0gdGVzdCREcml2ZXJSYXRpbmcpKQpgYGAKCgpgYGB7cn0KaGVhZChyZXN1bHRzKQpgYGAKCmBgYHtyfQpyZXN1bHRzJHJlc2lkdWFsID0gcmVzdWx0cyRhY3R1YWwtcmVzdWx0cyRwcmVkcwpgYGAKCmBgYHtyfQpnZ3Bsb3QoKSArIGdlb21fcG9pbnQoZGF0YSA9IHJlc3VsdHMsYWVzKHg9YWN0dWFsLHk9cmVzaWR1YWwpKSArIGdlb21faGxpbmUoeWludGVyY2VwdCA9IDAsY29sb3I9J3JlZCcpCmBgYAoKYGBge3J9CmdncGxvdChyZXN1bHRzLGFlcyh4PWFjdHVhbCx5PXByZWRzKSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvcj1yZXNpZHVhbCkpICsgZ2VvbV9saW5lKGFlcyh4PWFjdHVhbCx5PWFjdHVhbCksY29sb3I9J3JlZCcpCmBgYA==